機器學習5-深度學習網路發展史及實作

林嶔 (Lin, Chin)

Lesson 23

深度學習網路發展史(1)

F24_1

– AlexNet -> VGGNet -> GoogLeNet -> ResNet

F24_2

深度學習網路發展史(2)

F24_3

  1. 使用ReLU做為非線性變換的激活函數

  2. 使用Dropout技術

  3. 使用overlap的max pooling

  4. 數據增強

  5. 使用GPU加速深度卷積網絡的訓練

深度學習網路發展史(3)

– VGGNet論文中全部使用了3x3的卷積核和2x2的池化核,通過不斷加深網絡結構來提升性能。

F24_4

  1. 越深的網絡效果越好

  2. 1x1的卷積核也顯著提升效能

深度學習網路發展史(4)

– 除此之外,他引入了Network In Network的概念,在卷積層內使用大量的1x1的卷積核。

F24_5

深度學習網路發展史(5)

F24_6

深度學習網路發展史(6)

– 假設有一個比較淺的網絡達到了飽和的準確率,那麼後面再加上幾個的全等映射層,起碼誤差不會增加,因此設計出了Residual unit:

F24_7

F24_8

利用Pre-training的model進行Feature extraCtion(1)

– 們可以下載該模型進行預測

library(mxnet)
library(imager)
library(magrittr)

#Load a pre-training residual network model

res_model = mx.model.load("model/resnet-18", 0)
res_sym = mx.symbol.load("model/resnet-18-symbol.json")

#Define image processing functions

preproc.image <- function(im, mean.image = NULL) {
  # crop the image
  shape <- dim(im)
  if (shape[1] != shape[2]) {
    short.edge <- min(shape[1:2])
    xx <- floor((shape[1] - short.edge) / 2)
    yy <- floor((shape[2] - short.edge) / 2)
    cropped <- crop.borders(im, xx, yy)
  } else {
    cropped <- array(im, dim = c(shape, 1, 1))
    cropped <- cropped/max(cropped)
  }
  # resize to 224 x 224, needed by input of the model.
  resized <- resize(cropped, 224, 224)
  # convert to array (x, y, channel)
  arr <- as.array(resized) * 255
  dim(arr) <- dim(arr)[-3]
  # subtract the mean
  if (is.null(mean.image)) {mean.image = mean(arr)}
  normed <- arr - mean.image
  # Reshape to format needed by mxnet (width, height, channel, num)
  dim(normed) <- c(dim(normed), 1)
  return(normed)
}

#Read image and display

img <- load.image(system.file("extdata/parrots.png", package="imager"))
par(mar=rep(0,4))
plot(NA, xlim = 0:1, ylim = 0:1, xaxt = "n", yaxt = "n", bty = "n")
rasterImage(img, -0.04, -0.04, 1.04, 1.04, interpolate=FALSE)

#Pre-processing

normed <- preproc.image(img)

#Predict

prob <- predict(res_model, X = normed, ctx = mx.cpu())
which.max(prob)
## [1] 89

– 編號與Label對照表請訪問這裡

利用Pre-training的model進行Feature extraCtion(2)

#Get features

all_layers = res_sym$get.internals()

flatten0_output = which(all_layers$outputs == 'flatten0_output') %>% all_layers$get.output()
fc1_output = which(all_layers$outputs == 'fc1_output') %>% all_layers$get.output()
out = mx.symbol.Group(c(flatten0_output, fc1_output))
executor = mx.simple.bind(symbol = out, data = c(224, 224, 3, 1), ctx = mx.cpu())

mx.exec.update.arg.arrays(executor, res_model$arg.params, match.name = TRUE)
mx.exec.update.aux.arrays(executor, res_model$aux.params, match.name = TRUE)
mx.exec.update.arg.arrays(executor, list(data = mx.nd.array(normed)), match.name = TRUE)
mx.exec.forward(executor, is.train = FALSE)

feature = as.array(executor$ref.outputs$flatten0_output)
dim(feature)
## [1] 512   1

練習-1

#verification-1

FC_COEF = res_model$arg.params$fc1_weight %>% as.array
FC_BIAS = res_model$arg.params$fc1_bias %>% as.array
VERIFICATION = t(feature)%*%FC_COEF + FC_BIAS
FC1_OUTPUT = executor$ref.outputs$fc1_output %>% as.array %>% as.matrix %>% t
head(t(rbind(FC1_OUTPUT, VERIFICATION)))
##           [,1]      [,2]
## [1,] -2.762232 -2.762232
## [2,]  5.176768  5.176767
## [3,] -1.369514 -1.369514
## [4,] -4.658741 -4.658742
## [5,] -2.485924 -2.485924
## [6,] -3.269172 -3.269171
#verification-1
new.prob <- exp(VERIFICATION)/sum(exp(VERIFICATION))
head(cbind(prob, t(new.prob)))
##              [,1]         [,2]
## [1,] 6.529687e-09 6.529699e-09
## [2,] 1.831285e-05 1.831288e-05
## [3,] 2.628705e-08 2.628711e-08
## [4,] 9.800515e-10 9.800529e-10
## [5,] 8.607785e-09 8.607802e-09
## [6,] 3.933069e-09 3.933076e-09

利用剛剛萃取出的Feature以SVM做手寫數字辨識訓練(1)

– 首先要先做檔案的前處理及特徵萃取

library(mxnet)
library(imager)
library(magrittr)

#Define image processing functions

preproc.image <- function(im, mean.image = NULL) {
    # crop the image
    shape <- dim(im)
    if (shape[1] != shape[2]) {
        short.edge <- min(shape[1:2])
        xx <- floor((shape[1] - short.edge) / 2)
        yy <- floor((shape[2] - short.edge) / 2)
        cropped <- crop.borders(im, xx, yy)
    } else {
        cropped <- array(im, dim = c(shape, 1, 1))
        cropped <- cropped/max(cropped)
    }
    # resize to 224 x 224, needed by input of the model.
    resized <- resize(cropped, 224, 224)
    # convert to array (x, y, channel)
    arr <- as.array(resized) * 255
    dim(arr) <- dim(arr)[-3]
    # subtract the mean
    if (is.null(mean.image)) {mean.image = mean(arr)}
    normed <- arr - mean.image
    # Reshape to format needed by mxnet (width, height, channel, num)
    dim(normed) <- c(dim(normed), 1)
    return(normed)
}

#Load model information
res_model = mx.model.load("model/resnet-18", 0)
res_sym = mx.symbol.load("model/resnet-18-symbol.json")

#Read and split data
MNIST <- read.csv('data/train.csv', header=TRUE)
MNIST <- data.matrix(MNIST)

train.x <- MNIST[1:300, -1]
train.y <- MNIST[1:300, 1]

test.x <- MNIST[301:500, -1]
test.y <- MNIST[301:500, 1]

train.x.array <- matrix(NA, nrow = 300, ncol = 512)
test.x.array <- matrix(NA, nrow = 200, ncol = 512)

#Define symbol
all_layers = res_sym$get.internals()

flatten0_output = which(all_layers$outputs == 'flatten0_output') %>% all_layers$get.output()
fc1_output = which(all_layers$outputs == 'fc1_output') %>% all_layers$get.output()
out = mx.symbol.Group(c(flatten0_output, fc1_output))
executor = mx.simple.bind(symbol = out, data = c(224, 224, 3, 1), ctx = mx.cpu())

mx.exec.update.arg.arrays(executor, res_model$arg.params, match.name = TRUE)
mx.exec.update.aux.arrays(executor, res_model$aux.params, match.name = TRUE)

#Get Features

for (i in 1:300) {
  normed = preproc.image(array(train.x[i,], dim = c(28, 28, 1)))
  normed = normed[,,rep(1, 3),] %>% array(., dim = c(224, 224, 3, 1))
  mx.exec.update.arg.arrays(executor, list(data = mx.nd.array(normed)), match.name = TRUE)
  mx.exec.forward(executor, is.train = FALSE)
  train.x.array[i,]  = as.array(executor$ref.outputs$flatten0_output) %>% as.numeric
}

for (i in 1:200) {
  normed = preproc.image(array(test.x[i,], dim = c(28, 28, 1)))
  normed = normed[,,rep(1, 3),] %>% array(., dim = c(224, 224, 3, 1))
  mx.exec.update.arg.arrays(executor, list(data = mx.nd.array(normed)), match.name = TRUE)
  mx.exec.forward(executor, is.train = FALSE)
  test.x.array[i,]  = as.array(executor$ref.outputs$flatten0_output) %>% as.numeric
}

利用剛剛萃取出的Feature以SVM做手寫數字辨識訓練(2)

library(e1071)
svm.model = svm(x = train.x.array, y = factor(train.y))
pred.train.y = predict(svm.model, train.x.array)
print(table(pred.train.y, train.y))
##             train.y
## pred.train.y  0  1  2  3  4  5  6  7  8  9
##            0 41  0  0  0  0  0  0  0  0  0
##            1  0 33  0  0  0  0  0  0  0  0
##            2  0  0 32  0  0  0  0  0  0  0
##            3  0  0  0 30  0  0  0  0  0  0
##            4  0  0  0  0 28  0  0  0  0  0
##            5  0  0  0  0  0 21  0  0  0  0
##            6  0  0  0  0  0  0 30  0  1  0
##            7  0  0  0  0  0  1  0 29  0  0
##            8  0  0  1  0  0  0  0  0 22  0
##            9  0  0  0  0  0  0  0  0  0 31
pred.test.y = predict(svm.model, test.x.array)
print(table(pred.test.y, test.y))
##            test.y
## pred.test.y  0  1  2  3  4  5  6  7  8  9
##           0 15  0  0  0  0  0  1  1  1  2
##           1  0 17  0  0  0  0  0  1  0  0
##           2  0  0 24  1  0  7  0  0  0  0
##           3  0  0  2 16  0  8  0  0  1  1
##           4  0  0  0  0 23  0  1  0  0  0
##           5  0  0  1  1  0  5  0  0  0  0
##           6  0  0  1  0  0  0 14  0  2  0
##           7  0  0  1  0  0  0  0 16  0  1
##           8  0  0  1  0  0  0  0  0 14  1
##           9  0  0  0  0  3  0  0  0  0 17

練習-2

library(randomForest)
rf.model = randomForest(x = train.x.array, y = factor(train.y))
pred.train.y = predict(rf.model, train.x.array)
print(table(pred.train.y, train.y))
##             train.y
## pred.train.y  0  1  2  3  4  5  6  7  8  9
##            0 41  0  0  0  0  0  0  0  0  0
##            1  0 33  0  0  0  0  0  0  0  0
##            2  0  0 33  0  0  0  0  0  0  0
##            3  0  0  0 30  0  0  0  0  0  0
##            4  0  0  0  0 28  0  0  0  0  0
##            5  0  0  0  0  0 22  0  0  0  0
##            6  0  0  0  0  0  0 30  0  0  0
##            7  0  0  0  0  0  0  0 29  0  0
##            8  0  0  0  0  0  0  0  0 23  0
##            9  0  0  0  0  0  0  0  0  0 31
pred.test.y = predict(rf.model, test.x.array)
print(table(pred.test.y, test.y))
##            test.y
## pred.test.y  0  1  2  3  4  5  6  7  8  9
##           0 15  0  0  0  0  0  1  0  1  2
##           1  0 17  0  0  0  0  0  1  0  0
##           2  0  0 21  1  0  5  1  0  0  0
##           3  0  0  3 16  0  6  1  0  2  1
##           4  0  0  1  0 22  0  1  0  0  1
##           5  0  0  0  1  0  7  0  0  0  0
##           6  0  0  0  0  2  0 12  0  2  1
##           7  0  0  1  0  0  0  0 16  0  0
##           8  0  0  3  0  0  2  0  0 13  1
##           9  0  0  1  0  2  0  0  1  0 16

利用Resnet的架構進行手寫數字辨識訓練(1)

– 首先要先做檔案的前處理(因為用CPU運算的原因,這裡僅做30個TRAIN DATA)

#Resized to 224*224
MNIST <- read.csv('data/train.csv', header=TRUE)
MNIST <- data.matrix(MNIST)

train.x <- MNIST[1:30, -1]
train.y <- MNIST[1:30, 1]

train.x.array <- t(train.x)
dim(train.x.array) <- c(28, 28, 1, nrow(train.x))

norm_train.x.array = array(NA, dim = c(224, 224, 3, nrow(train.x)))
for (i in 1:nrow(train.x)) {
  norm_train.x.array[,,,i] = preproc.image(train.x.array[,,,i])
}

利用Resnet的架構進行手寫數字辨識訓練(2)

#Define model

flatten0_output = which(all_layers$outputs == 'flatten0_output') %>% all_layers$get.output()

fc1_weight = mx.symbol.Variable('fc1_weight')
fc1_bias = mx.symbol.Variable('fc1_bias')

bn1 = mx.symbol.BatchNorm(data = flatten0_output)
fc1 = mx.symbol.FullyConnected(data = bn1, weight = fc1_weight, bias = fc1_bias, num_hidden = 10)
softmax = mx.symbol.SoftmaxOutput(fc1, name='softmax')

##Start to train model

mx.set.seed(0)
logger = mx.metric.logger$new()

cnn.model = mx.model.FeedForward.create(softmax,
                                        X = norm_train.x.array,
                                        y = train.y,
                                        ctx = mx.cpu(),
                                        num.round = 30,
                                        array.batch.size = 10,
                                        learning.rate = 0.1,
                                        momentum = 0.9, wd = 0.00001,
                                        eval.metric = mx.metric.accuracy,
                                        epoch.end.callback = mx.callback.log.train.metric(100, logger))

利用Resnet的架構進行手寫數字辨識訓練(3)

pred.data = norm_train.x.array[,,,7:10]
prob = predict(cnn.model, pred.data)
pred.label = max.col(t(prob)) - 1
print(pred.label)
## [1] 4 7 7 3
par(mar = rep(0, 4), mfcol = c(2, 2))
for (i in 1:4) {
  plot(NA, xlim = 0:1, ylim = 0:1, xaxt = "n", yaxt = "n", bty = "n")
  rasterImage(t(train.x.array[,,,6+i]/255), -0.04, -0.04, 1.04, 1.04, interpolate=FALSE)
}

機器學習課程總結

– 想想牛頓、高斯、愛因斯坦他們有使用這些方法嗎?為什麼他們能精準地預測眾多物理、天文現象?

– 這些機器學習方法僅僅是在我們無法描述X與Y的關係時幫助我們做複雜函數的擬合,使我們能在不清楚他們的關係前精準地預測。

F24_9